import csv

def parse_city_urls(input_file, output_dir):
    """
    解析city_urls.csv文件，为每个城市添加国家字段，并生成新的CSV文件
    
    参数:
    input_file: 输入的city_urls.csv文件路径
    output_dir: 输出目录路径
    """
    countries = []
    current_country = "乌兹别克斯坦"
    
    # 读取输入文件
    with open(input_file, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # 分割成行
    lines = content.split('\n')
    
    # 解析每一行
    for line in lines:
        line = line.strip()
        
        # 检测国家标题行
        if line.startswith('**') and line.endswith('**'):
            current_country = line.strip('*').strip()
            countries.append({
                'country': current_country,
                'cities': []
            })
            continue
        
        # 检测城市行
        if line.startswith('- ') and '餐厅:' in line:
            parts = line.split('餐厅:')
            if len(parts) == 2:
                city_part = parts[0].strip('- ')
                url_part = parts[1].strip()
                
                # 处理URL中的Markdown链接格式
                if url_part.startswith('[') and '](' in url_part:
                    start = url_part.find('(') + 1
                    end = url_part.find(')')
                    if start > 0 and end > start:
                        url = url_part[start:end]
                    else:
                        url = url_part
                else:
                    url = url_part
                
                # 添加到当前国家的城市列表
                if countries:
                    countries[-1]['cities'].append({
                        'city': city_part,
                        'url': url
                    })
    
    # 创建输出目录
    import os
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # 创建主CSV文件
    main_output_file = os.path.join(output_dir, 'all_cities_with_countries.csv')
    with open(main_output_file, 'w', encoding='utf-8', newline='') as csvfile:
        fieldnames = ['country', 'city', 'url']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        
        for country_data in countries:
            for city_data in country_data['cities']:
                writer.writerow({
                    'country': country_data['country'],
                    'city': city_data['city'],
                    'url': city_data['url']
                })
    
    # 为每个国家创建单独的CSV文件
    for country_data in countries:
        country_filename = f"{country_data['country'].replace(' ', '_')}_restaurants.csv"
        country_output_file = os.path.join(output_dir, country_filename)
        
        with open(country_output_file, 'w', encoding='utf-8', newline='') as csvfile:
            fieldnames = ['city', 'url']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            
            for city_data in country_data['cities']:
                writer.writerow({
                    'city': city_data['city'],
                    'url': city_data['url']
                })
    
    print(f"处理完成！共解析了 {len(countries)} 个国家:")
    for country_data in countries:
        print(f"- {country_data['country']}: {len(country_data['cities'])} 个城市")
    print(f"主CSV文件已保存至: {main_output_file}")
    for country_data in countries:
        country_filename = f"{country_data['country'].replace(' ', '_')}_restaurants.csv"
        print(f"{country_data['country']} 的CSV文件已保存至: {os.path.join(output_dir, country_filename)}")

if __name__ == '__main__':
    parse_city_urls('city_urls.csv', 'output')